import os
from jieba.analyse import ChineseAnalyzer
from whoosh.fields import SchemaClass, TEXT, KEYWORD, ID, STORED, Schema
from whoosh.index import create_in
import re
from whoosh.qparser import MultifieldParser, QueryParser


file = open("poem.txt", "r",encoding='utf-8')
lines = []
for i in file:
	lines.append(i)
file.close()

titles = []
authors=[]
dynasties=[]
contents=[]

for line in lines: # 逐行遍历,去掉解释等
    titletemp = re.findall(r'\'title\': \'(.*?)\'',line,re.DOTALL)
    authorstemp = re.findall(r' \'authors\': \'(.*?)\'',line,re.DOTALL)
    dynastiestemp = re.findall(r' \'dynasties\': \'(.*?)\'',line,re.DOTALL)
    contentstemp = re.findall(r' \'contents\': \'(.*?)\'',line,re.DOTALL)#一起爬到的用这个，得到内容
    for title in titletemp:
        strtitle = "".join(title)
        titles.append(strtitle)
    for author in authorstemp:
        strauthor = "".join(author)
        authors.append(strauthor)
    for dynasty in dynastiestemp:
        strdynasty = "".join(dynasty)
        dynasties.append(strdynasty)
    for content in contentstemp:
        strcontent = "".join(content)
        contents.append(strcontent)


analyzer = ChineseAnalyzer()
class PoemSchema(SchemaClass):
    title = TEXT(stored=True,analyzer=analyzer)
    authors=TEXT(stored=True,analyzer=analyzer)
    dynasties=TEXT(stored=True,analyzer=analyzer)
    contents = TEXT(stored=True,analyzer=analyzer)

schema = PoemSchema()

if not os.path.exists("index"):
    os.mkdir("index")
ix = create_in("index", schema)


ix = create_in("./index", schema, indexname='poem_index')
writer = ix.writer()
n = len(titles)
i = 0
while i<n:
    writer.add_document(title=titles[i], authors=authors[i],dynasties=dynasties[i],contents=contents[i])
    i = i + 1
writer.commit()

# enter = str(input("请输入想要搜索的内容："))

def Searching(enter):
    with ix.searcher() as searcher:
        query = MultifieldParser(["title", 'authors', 'dynasties'], ix.schema).parse(enter)
        results = searcher.search(query)
        query = QueryParser("contents", ix.schema).parse(enter)
        Results = searcher.search(query)
        results.extend(Results)
        outs = []
        for result in results:
            data = str(result)
            titletemp = re.findall(r'\'title\': \'(.*?)\'',data,re.DOTALL)
            authorstemp = re.findall(r'\'authors\': \'(.*?)\'',data,re.DOTALL)
            dynastiestemp = re.findall(r'\'dynasties\': \'(.*?)\'',data,re.DOTALL)
            contentstemp = re.findall(r' \'contents\': \'(.*?)\'',data,re.DOTALL)

            for value in zip(titletemp, authorstemp, dynastiestemp, contentstemp):
                title, author, dynasty, content = value
                out = {'title': title,'authors': author,'dynasties': dynasty,'contents': content}
                outs.append(out)

        return outs
        # for i in outs:
        #     print(i['title'])
        #     print(i['authors'])
        #     print(i['dynasties'])
        #     content = re.sub(r'\s', '', i['contents'])
        #     for i in content:
        #         if (i == " "):
        #             print("", end="")
        #         if (i == "。"):
        #             print(i)
        #         else:
        #             print(i, end='')
        #     # print(i['contents'])
        #     print("=============================================================")
